df_population <- data.frame(
state = c("AK", "AL", "AR", "AS", "AZ", "CA", "CO", "CT", "DC", "DE", "FL",
"GA", "GU", "HI", "IA", "ID", "IL", "IN", "KS", "KY", "LA", "MA",
"MD", "ME", "MI", "MN", "MO", "MP", "MS", "MT", "NC", "ND", "NE",
"NH", "NJ", "NM", "NV", "NY", "OH", "OK", "OR", "PA", "PR", "RI",
"SC", "SD", "TN", "TX", "UT", "VA", "VI", "VT", "WA", "WI", "WV", "WY"),
population = c(731545, 4903185, 3017804, 55465 , 7278717, 39512223, 5758736, 3565287, 705749, 973764, 21477737,
10617423, 165768, 1415872, 3155070, 1787065, 12671821, 6732219, 2913314, 4467673, 4648794, 6892503,
6045680, 1344212, 9986857, 5639632, 6137428, 56882, 2976149, 1068778, 10488084, 762062, 1934408,
1359711, 8882190, 2096829, 3080156, 19453561, 11689100, 3956971, 4217737, 12801989, 3193694, 1059361,
5148714, 884659, 6829174, 28995881, 3205958, 8535519, 106977 , 623989, 7614893, 5822434, 1792147, 578759)
)
df_daily <- fread("https://covidtracking.com/api/v1/us/daily.csv")
df_states <- fread("https://covidtracking.com/api/v1/states/daily.csv") %>%
replace(is.na(.), 0) %>%
inner_join(df_population, by = "state")%>%
mutate(date = as.Date(as.character(date), "%Y%m%d"))
tableau10 <- as.list(ggthemes_data[["tableau"]][["color-palettes"]][["regular"]][[1]][,2])$value
first_day <- toString(min(df_states$date))
today <- toString(max(df_states$date))
head(df_states, n = 3)| date | state | positive | negative | pending | hospitalizedCurrently | hospitalizedCumulative | inIcuCurrently | inIcuCumulative | onVentilatorCurrently | onVentilatorCumulative | recovered | dataQualityGrade | lastUpdateEt | dateModified | checkTimeEt | death | hospitalized | dateChecked | totalTestsViral | positiveTestsViral | negativeTestsViral | positiveCasesViral | fips | positiveIncrease | negativeIncrease | total | totalTestResults | totalTestResultsIncrease | posNeg | deathIncrease | hospitalizedIncrease | hash | commercialScore | negativeRegularScore | negativeScore | positiveScore | score | grade | population |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2020-06-25 | AK | 816 | 98636 | 0 | 14 | 0 | 0 | 0 | 2 | 0 | 513 | A | 6/25/2020 00:00 | 2020-06-25T00:00:00Z | 06/24 20:00 | 12 | 0 | 2020-06-25T00:00:00Z | 99452 | 0 | 0 | 0 | 2 | 24 | 3332 | 99452 | 99452 | 3356 | 99452 | 0 | 0 | 74506565ff9f0d6621e232e1e62239d658786e81 | 0 | 0 | 0 | 0 | 0 | 0 | 731545 |
| 2020-06-25 | AL | 33206 | 336252 | 0 | 693 | 2612 | 0 | 761 | 0 | 447 | 18866 | B | 6/25/2020 11:00 | 2020-06-25T11:00:00Z | 06/25 07:00 | 896 | 2612 | 2020-06-25T11:00:00Z | 0 | 0 | 0 | 32753 | 1 | 1142 | 9557 | 369458 | 369458 | 10699 | 369458 | 5 | 45 | 17d9d87e1df190897ce93d3fcbb382e6dac460e3 | 0 | 0 | 0 | 0 | 0 | 0 | 4903185 |
| 2020-06-25 | AR | 18062 | 259318 | 0 | 284 | 1245 | 0 | 0 | 61 | 194 | 12127 | A | 6/25/2020 15:20 | 2020-06-25T15:20:00Z | 06/25 11:20 | 240 | 1245 | 2020-06-25T15:20:00Z | 0 | 0 | 0 | 18062 | 5 | 687 | 4827 | 277380 | 277380 | 5514 | 277380 | 0 | 31 | 651dd04526a06699eef6c83fc27f1850be5e46cb | 0 | 0 | 0 | 0 | 0 | 0 | 3017804 |
df_states %>% filter(state == "RI") %>%
ggplot() %>%
+ geom_label(x = as.Date("2020-03-01"), y = 900, color = "black", label = "death", size = 2, hjust = 0) %>%
+ geom_label(x = as.Date("2020-03-01"), y = 800, color = tableau10[1], label = "positiveIncrease", size = 2, hjust = 0) %>%
+ geom_label(x = as.Date("2020-03-01"), y = 700, color = tableau10[2], label = "hospitalizedCurrently", size = 2, hjust = 0) %>%
+ geom_text(mapping = aes(x = date, y = 1050, label = positive), color = tableau10[9], size = 2, angle = 90, hjust = 0) %>%
+ geom_line(mapping = aes(x = date, y = death), alpha = 0.7, color = "black", size = LINE_SIZE) %>%
+ geom_text(mapping = aes(x = date - 0.5, y = death + 10, label = death), color = "black", size = 1.5) %>%
+ geom_point(mapping = aes(x = date, y = death), color = "black", shape = 10) %>%
+ geom_line(mapping = aes(x = date, y = hospitalizedCurrently), alpha = 0.7, color = tableau10[1], size = LINE_SIZE) %>%
+ geom_text(mapping = aes(x = date - 0.5, y = hospitalizedCurrently + 10, label = hospitalizedCurrently), color = tableau10[1], size = 1.5) %>%
+ geom_point(mapping = aes(x = date, y = hospitalizedCurrently), color = tableau10[1], shape = 15) %>%
+ geom_line(mapping = aes(x = date, y = positiveIncrease), alpha = 0.7, color = tableau10[2], size = LINE_SIZE) %>%
+ geom_text(mapping = aes(x = date - 0.5, y = positiveIncrease + 10, label = positiveIncrease), color = tableau10[2], size = 1.5) %>%
+ geom_point(mapping = aes(x = date, y = positiveIncrease), color = tableau10[2]) %>%
+ scale_x_date(breaks = seq(as.Date(first_day), as.Date(today), by = "day")) %>%
+ xlab("Date") + ylab("") + ggtitle("RI")df_states %>% group_by(date) %>%
summarise(positiveIncrease = sum(positiveIncrease), hospitalizedCurrently = sum(hospitalizedCurrently)) %>%
ungroup() %>%
ggplot() %>%
+ geom_label(x = as.Date(first_day), y = 50000, color = tableau10[1], label = "hospitalizedCurrently", size = 2, hjust = 0) %>%
+ geom_label(x = as.Date(first_day), y = 55000, color = tableau10[2], label = "positiveIncrease", size = 2, hjust = 0) %>%
+ geom_line(mapping = aes(x = date, y = hospitalizedCurrently), alpha = 0.7, color = tableau10[1], size = LINE_SIZE) %>%
+ geom_text(mapping = aes(x = date - 0.5, y = hospitalizedCurrently + 1000, label = hospitalizedCurrently), color = tableau10[1], size = 1.5) %>%
+ geom_point(mapping = aes(x = date, y = hospitalizedCurrently), color = tableau10[1], shape = 15) %>%
+ geom_line(mapping = aes(x = date, y = positiveIncrease), alpha = 0.7, color = tableau10[2], size = LINE_SIZE) %>%
+ geom_text(mapping = aes(x = date - 0.5, y = positiveIncrease + 1000, label = positiveIncrease), color = tableau10[2], size = 1.5) %>%
+ geom_point(mapping = aes(x = date, y = positiveIncrease), color = tableau10[2]) %>%
+ scale_x_date(breaks = seq(as.Date(first_day), as.Date(today), by = "day")) %>%
+ xlab("Date") + ylab("") + ggtitle("US - positiveIncrease & hospitalizedCurrently ")## `summarise()` ungrouping output (override with `.groups` argument)
Note that the
echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.
df_states %>%
ggplot() %>%
+ geom_smooth(mapping = aes(x = date, y = positiveIncrease), color = "gray", alpha = 0.3, method = "loess") %>%
+ geom_line(mapping = aes(x = date, y = positiveIncrease), alpha = 0.7, color = tableau10[2], size = LINE_SIZE) %>%
+ geom_point(mapping = aes(x = date, y = positiveIncrease), color = tableau10[2], size = 1) %>%
+ scale_x_date(breaks = seq(as.Date(first_day), as.Date(today), by = "2 days")) %>%
+ facet_wrap(state ~ ., ncol = 6, scales = "free") %>%
+ xlab("Date") + ylab("") + ggtitle("US - positiveIncrease by state")## `geom_smooth()` using formula 'y ~ x'
df_states %>%
ggplot() %>%
+ geom_smooth(mapping = aes(x = date, y = hospitalizedCurrently), color = "gray", alpha = 0.3, method = "loess") %>%
+ geom_line(mapping = aes(x = date, y = hospitalizedCurrently), alpha = 0.7, color = tableau10[1], size = LINE_SIZE) %>%
+ geom_point(mapping = aes(x = date, y = hospitalizedCurrently), color = tableau10[1], size = 1) %>%
+ scale_x_date(breaks = seq(as.Date(first_day), as.Date(today), by = "2 days")) %>%
+ facet_wrap(state ~ ., ncol = 6, scales = "free") %>%
+ xlab("Date") + ylab("") + ggtitle("US - hospitalizedCurrently by state")## `geom_smooth()` using formula 'y ~ x'
df_states %>%
mutate(testPositiveRate = positiveIncrease / totalTestResultsIncrease, testedPopulationRate = totalTestResults / population) %>%
ggplot() %>%
+ geom_smooth(mapping = aes(x = testedPopulationRate, y = testPositiveRate), color = "gray", alpha = 0.3, method = "loess") %>%
+ geom_line(mapping = aes(x = testedPopulationRate, y = testPositiveRate), alpha = 0.7, color = tableau10[4], size = LINE_SIZE) %>%
+ geom_point(mapping = aes(x = testedPopulationRate, y = testPositiveRate), color = tableau10[4], size = 1) %>%
+ scale_x_continuous(limits = c(0, 0.20), breaks = seq(0, 1, by = 0.005)) %>%
+ scale_y_continuous(limits = c(-.25, 1.25), breaks = seq(0, 1, by = 0.2)) %>%
+ facet_wrap(state ~ ., ncol = 6, scales = "free") %>%
+ xlab("testPositiveRate") + ylab("testedPopulationRate") + ggtitle("US - testPositiveRate against testedPopulationRate")## `geom_smooth()` using formula 'y ~ x'
df_states %>%
mutate(deathPerMillion = death / population * 10000) %>%
ggplot() %>%
+ geom_line(mapping = aes(x = date, y = deathPerMillion), alpha = 0.7, color = tableau10[3], size = LINE_SIZE) %>%
+ geom_point(mapping = aes(x = date, y = deathPerMillion), color = tableau10[3], size = 1) %>%
+ scale_x_date(breaks = seq(as.Date(first_day), as.Date(today), by = "2 days")) %>%
+ scale_y_continuous(limits = c(0, 15), breaks = seq(0, 15, by = 3)) %>%
+ facet_wrap(state ~ ., ncol = 6, scales = "free") %>%
+ xlab("date") + ylab("deathPerMillion %") + ggtitle("US - deathPerMillion by state")df_states %>%
mutate(testResultsIncrease = positiveIncrease + negativeIncrease) %>%
ggplot() %>%
+ geom_line(mapping = aes(x = date, y = testResultsIncrease), alpha = 0.7, color = tableau10[7], size = LINE_SIZE) %>%
+ geom_point(mapping = aes(x = date, y = testResultsIncrease), color = tableau10[7], size = 1) %>%
+ scale_x_date(breaks = seq(as.Date(first_day), as.Date(today), by = "2 days")) %>%
+ facet_wrap(state ~ ., ncol = 6, scales = "free") %>%
+ xlab("date") + ylab("testResultsIncrease") + ggtitle("US - testResultsIncrease by state")